Squaredifference

逐元素计算两个输入数组对应元素的差的平方。

\[\text{output}_i = (\text{input0}_i - \text{input1}_i)^2\]

对于输入 input0input1 中对应位置的每个元素,计算它们差的平方值。

输入:
  • input0 - 第一个输入数据地址。

  • input1 - 第二个输入数据地址。

  • params - 参数打包成数组:
    • input0_dims - input0的维度信息数组(int*)。

    • input1_dims - input1的维度信息数组(int*)。

    • output_dims - output的维度信息数组(int*)。

    • strides0 - input0的步长数组(int*)。

    • strides1 - input1的步长数组(int*)。

    • strides_output - output的步长数组(int*)。

    • num_dims - 维度数(int)。

  • core_mask - 核掩码(仅共享存储版本需要)。

输出:
  • output - 计算结果地址,其大小与输入相同。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持fp32, int8, int16, int32, fp64, cplx64, cplx128

  • MT7004 支持fp16, fp32, int16, int32, cplx64

共享存储版本:

void i8_square_difference_s(int8_t *input0, int8_t *input1, int8_t *output, long long *params, int core_mask)
void i16_square_difference_s(int16_t *input0, int16_t *input1, int16_t *output, long long *params, int core_mask)
void i32_square_difference_s(int32_t *input0, int32_t *input1, int32_t *output, long long *params, int core_mask)
void hp_square_difference_s(half *input0, half *input1, half *output, long long *params, int core_mask)
void fp_square_difference_s(float *input0, float *input1, float *output, long long *params, int core_mask)
void dp_square_difference_s(double *input0, double *input1, double *output, long long *params, int core_mask)
void c64_square_difference_s(float *input0, float *input1, float *output, long long *params, int core_mask)
void c128_square_difference_s(double *input0, double *input1, double *output, long long *params, int core_mask)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <squaredifference.h>
 4
 5int main(int argc, char* argv[]) {
 6    float* input0 = (float*)0x81000000;
 7    float* input1 = (float*)0x82001000;
 8    float* output = (float*)0x83002000;
 9    float* check = (float*)0x84003000;
10    int *strides0 = (int*)0x85004000;
11    int *strides1 = (int*)0x86005300;
12    int *strides_output = (int*)0x87006000;
13
14    // same shape
15    int input0_dims[] = {4, 8, 16};  // 2x2
16    int input1_dims[] = {4, 8, 16};  // 2x2
17    int output_dims[] = {4, 8, 16};      // 2x2
18    int num_dims = 3;
19
20    unsigned long long params[9];
21    params[0] = (unsigned long long)input0_dims;
22    params[1] = (unsigned long long)input1_dims;
23    params[2] = (unsigned long long)output_dims;
24    params[3] = (unsigned long long)strides0;
25    params[4] = (unsigned long long)strides1;
26    params[5] = (unsigned long long)strides_output;
27    params[6] = (unsigned long long)num_dims;
28
29    int total_input0 = get_total_elements(num_dims, input0_dims);
30    int total_input1 = get_total_elements(num_dims, input1_dims);
31    int total_output = get_total_elements(num_dims, output_dims);
32
33    srand(time(0));
34
35    int i;
36    for (i = 0; i < total_input0; ++i) {
37        input0[i] = (float)(rand() % 100) / 10.0f;
38    }
39
40    for (i = 0; i < total_input1; ++i) {
41        input1[i] = (float)(rand() % 100) / 10.0f;
42    }
43
44    int core_mask = 0x0f;
45    fp_square_difference_s(input0, input1, output, params, core_mask);
46    return 0;
47}

私有存储版本:

void i8_square_difference_p(int8_t *input0, int8_t *input1, int8_t *output, long long *params)
void i16_square_difference_p(int16_t *input0, int16_t *input1, int16_t *output, long long *params)
void i32_square_difference_p(int32_t *input0, int32_t *input1, int32_t *output, long long *params)
void hp_square_difference_p(half *input0, half *input1, half *output, long long *params)
void fp_square_difference_p(float *input0, float *input1, float *output, long long *params)
void dp_square_difference_p(double *input0, double *input1, double *output, long long *params)
void c64_square_difference_p(float *input0, float *input1, float *output, long long *params)
void c128_square_difference_p(double *input0, double *input1, double *output, long long *params)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <squaredifference.h>
 4
 5int main(int argc, char* argv[]) {
 6     float* input0 = (float*)0x10010000;
 7    float* input1 = (float*)0x10020000;
 8    float* output = (float*)0x10030000;
 9    float* check = (float*)0x10040000;
10    int *strides0 = (int*)0x10050000;
11    int *strides1 = (int*)0x10053000;
12    int *strides_output = (int*)0x10056000;
13
14    // same shape
15    int input0_dims[] = {4, 8, 16};  // 2x2
16    int input1_dims[] = {4, 8, 16};  // 2x2
17    int output_dims[] = {4, 8, 16};      // 2x2
18    int num_dims = 3;
19
20    unsigned long long params[9];
21    params[0] = (unsigned long long)input0_dims;
22    params[1] = (unsigned long long)input1_dims;
23    params[2] = (unsigned long long)output_dims;
24    params[3] = (unsigned long long)strides0;
25    params[4] = (unsigned long long)strides1;
26    params[5] = (unsigned long long)strides_output;
27    params[6] = (unsigned long long)num_dims;
28
29    int total_input0 = get_total_elements(num_dims, input0_dims);
30    int total_input1 = get_total_elements(num_dims, input1_dims);
31    int total_output = get_total_elements(num_dims, output_dims);
32
33    srand(time(0));
34
35    int i;
36    for (i = 0; i < total_input0; ++i) {
37        input0[i] = (float)(rand() % 100) / 10.0f;
38    }
39
40    for (i = 0; i < total_input1; ++i) {
41        input1[i] = (float)(rand() % 100) / 10.0f;
42    }
43
44    fp_square_difference_p(input0, input1, output, params);
45    return 0;
46}